############################ CODE FOR CONTROLLED DIRECT EFFECTS MSM
rm(list=ls())
library(plyr); library(parallel); library(foreach);
library(MASS); library(mice);
library(boot);library(mgcv);library(CORElearn)
library(fmsb); library(survey);library('BART')
library(readstata13); library(dummies)
library(QuantPsyc); library(doParallel)

sink(file="replication.log")
print(sessionInfo())

# Import functions relevant to the models
# Collection of hand-made functions to run MSMs with multi-categorial treatments, pool estimates, etc.
source("IPTWFunctions.R") 

######  SECTION 2. ESTIMATING CDE USING MARGINAL STRUCTURAL MODELS

## SECTION 2.2 BENEFITS OF THE PSEUDO-SAMPLE (TABLES 1 AND 2)

## Create demonstrative data
demdat = data.frame(Z0=rep(0:2, each=6), Z1=rep(rep(0:2, each=2), 3), X1=rep(0:1, 9), 
                    N=c(12000,6000,4000,5000,1000,2000,7000,4000,5000,7000,3000,4000,2000,1000,2000,3000,1000,6000))

## Calculate conditional probabilities, weights and pseudo-sample quantities
demdat$PZ1.Z0 = round(apply(demdat[,1:2], 1, function(x) sum(demdat$N[(demdat$Z0==x[1] & demdat$Z1==x[2])])/sum(demdat$N[demdat$Z0==x[1]])),3)
demdat$PZ1.Z0X1 = round(apply(demdat[,1:3], 1, function(x) sum(demdat$N[(demdat$Z0==as.numeric(x[1]) & demdat$Z1==as.numeric(x[2]) & demdat$X1==as.numeric(x[3]))])/sum(demdat$N[(demdat$Z0==x[1] & demdat$X1==x[3])])),3)
demdat$Wtinv = round(apply(demdat[,5:6], 1, function(x) (x[1])/x[2]),3)
demdat$Npseudo = round(apply(demdat[,c(4,7)], 1, function(x) x[1]*x[2]),3)
demdat$PZ1.Z0X1.pseudo = round(apply(demdat[,c(1,2,3)], 1, function(x) sum(demdat$Npseudo[(demdat$Z0==as.numeric(x[1]) & demdat$Z1==as.numeric(x[2]) & demdat$X1==as.numeric(x[3]))])/sum(demdat$Npseudo[(demdat$Z0==x[1] & demdat$X1==x[3])])),3)

## Table 1
print("Table 1")
demdat[c(1:6,13:18),c(1,2,3,5,6,7,4,8)]

## Table 2
print("Table 2")
(table2 = demdat[demdat$Z1==2,c(1,3,6,9)])

## SECTION 2.3: WEIGHTING: METHODS AND IMPLICATIONS (FIGURE 3)

# Sample size and IDs
n <- 1000
id <- 1:n

# Create empty matrixes that will store results of each simulation in each row
ests_bw_ologit <- ests_bw_splines <- ests_bw_rf <- real_probs <- ests_bw_saturate <- ests_bw_ologit_trim <- ests_bw_splines_trim <- ests_bw_rf_trim  <- wts_ologit <- wts_ologit_trim <- wts_splines_trim <- wts_splines <-  wts_rf_trim <- wts_rf <- NULL

# Set a seed
set.seed(2803)
for(k in 1:500){
  #### SIMULATE DATA
  # First stage: X_0
  x0 <- sample(0:1, n, replace=TRUE, prob=c(0.6,0.4))
  
  # Treatment: T_0
  u_t0 = rnorm(n,0,0.5)
  eta_t0 <- -2.5 + x0*1.5 + u_t0
  cuts_t0 <- c(-1.25, .45)
  
  probs_t0 <- simOLOG(eta_t0, cuts_t0)
  summary(probs_t0)
  t0 <- apply(probs_t0,1,function(x) sample(0:2,1,prob = x))
  
  # Second stage: X_1
  u_x1 = rnorm(n,0,0.35)
  
  # X1 under control and treatment
  # Under t0=0
  eta_x1_0 <- -2.5 + x0*0.5 + 0*1.1 - u_x1
  probs_x1_0 <- ilogit(eta_x1_0)
  x1_0 <- sapply(probs_x1_0,function(x) sample(0:1,1,prob=c(1-x,x)))
  
  # Under t0=1
  eta_x1_1 <- -2.5 + x0*0.5 + 1*1.1 - u_x1
  probs_x1_1 <- ilogit(eta_x1_1)
  x1_1 <- sapply(probs_x1_1,function(x) sample(0:1,1,prob=c(1-x,x)))
  
  # Under t0=2
  eta_x1_2 <- -2.5 + x0*0.5 + 2*1.1 - u_x1
  probs_x1_2 <- ilogit(eta_x1_2)
  x1_2 <- sapply(probs_x1_2,function(x) sample(0:1,1,prob=c(1-x,x)))
  
  x1 <- ifelse(t0==0,x1_0,ifelse(t0==1, x1_1, x1_2))
  #table(x1)
  
  # Treatment: T_1
  u_t1 = rnorm(n,0,0.5)
  cuts_t1 <- c(-1.05, .65)
  
  eta_t1_0 <- -3.5 + x0*0.2 + x1_0*0.6 + 0*1 + u_t1
  probs_t1_0 <- simOLOG(eta_t1_0, cuts_t1)
  t1_0 <- apply(probs_t1_0,1,function(x) sample(0:2,1,prob = x)) 
  
  eta_t1_1 <- -3.5 + x0*0.2 + x1_1*0.6 + 1*1 + u_t1
  probs_t1_1 <- simOLOG(eta_t1_1, cuts_t1)
  t1_1 <- apply(probs_t1_1,1,function(x) sample(0:2,1,prob = x)) 
  
  eta_t1_2 <- -3.5 + x0*0.2 + x1_2*0.6 + 2*1 + u_t1
  probs_t1_2 <- simOLOG(eta_t1_2, cuts_t1)
  t1_2 <- apply(probs_t1_2,1,function(x) sample(0:2,1,prob = x)) 
  
  t1 <- ifelse(t0==0,t1_0, ifelse(t0==1,t1_1, t1_2))
  
  # Potential outcomes
  u_y = rnorm(n,0,0.4)
  # T0=0 & T1=0
  eta_y00 <- -3 + x0*0.2 + 0*1.5 + x1_0*0.4 + 0.2*0 + u_y
  probs_y00 <- ilogit(eta_y00)
  y00 <- sapply(probs_y00,function(x) sample(0:1, 1, prob=c(1-x,x)))
  
  # T0=0 & T1=1
  eta_y01 <- -3 + x0*0.2 + 0*1.5 + x1_0*0.4 + 0.2*1 + u_y
  probs_y01 <- ilogit(eta_y01)
  y01 <- sapply(probs_y01,function(x) sample(0:1, 1, prob=c(1-x,x)))
  
  # T0=0 & T1=2
  eta_y02 <- -3 + x0*0.2 + 0*1.5 + x1_0*0.4 + 0.2*2 + u_y
  probs_y02 <- ilogit(eta_y02)
  y02 <- sapply(probs_y02,function(x) sample(0:1, 1, prob=c(1-x,x)))
  
  # T0=1 & T1=0
  eta_y10 <- -3 + x0*0.2 + 1*1.5 + x1_1*0.4 + 0.2*0 + u_y
  probs_y10 <- ilogit(eta_y10)
  y10 <- sapply(probs_y10,function(x) sample(0:1, 1, prob=c(1-x,x)))
  
  # T0=1 & T1=1
  eta_y11 <- -3 + x0*0.2 + 1*1.5 + x1_1*0.4 + 0.2*1 + u_y
  probs_y11 <- ilogit(eta_y11)
  y11 <- sapply(probs_y11,function(x) sample(0:1, 1, prob=c(1-x,x)))
  
  # T0=1 & T1=2
  eta_y12 <- -3 + x0*0.2 + 1*1.5 + x1_1*0.4 + 0.2*2 + u_y
  probs_y12 <- ilogit(eta_y12)
  y12 <- sapply(probs_y12,function(x) sample(0:1, 1, prob=c(1-x,x)))
  
  # T0=2 & T1=0
  eta_y20 <- -3 + x0*0.2 + 2*1.5 + x1_2*0.4 + 0.2*0 + u_y
  probs_y20 <- ilogit(eta_y20)
  y20 <- sapply(probs_y20,function(x) sample(0:1, 1, prob=c(1-x,x)))
  
  # T0=2 & T1=1
  eta_y21 <- -3 + x0*0.2 + 2*1.5 + x1_2*0.4 + 0.2*1 +  u_y
  probs_y21 <- ilogit(eta_y21)
  y21 <- sapply(probs_y21,function(x) sample(0:1, 1, prob=c(1-x,x)))
  
  # T0=2 & T1=2
  eta_y22 <- -3 + x0*0.2 + 2*1.5 + x1_2*0.4 + 0.2*2 + u_y
  probs_y22 <- ilogit(eta_y22)
  y22 <- sapply(probs_y22,function(x) sample(0:1, 1, prob=c(1-x,x)))
  
  # And Y based on observed assignment
  y <- ifelse((t0==0 & t1==0), y00, ifelse((t0==0 & t1==1), y01, ifelse((t0==0 & t1==2), y02,
                                                                        ifelse((t0==1 & t1==0), y10, ifelse((t0==1 & t1==1), y11, ifelse((t0==1 & t1==2), y12,
                                                                                                                                         ifelse((t0==2 & t1==0), y20, ifelse((t0==2 & t1==1), y21, y22))))))))
  
  # Positivity assumption
  table(t0[(x0==0 & x1==0)],t1[(x0==0 & x1==0)])
  table(t0[(x0==0 & x1==1)],t1[(x0==0 & x1==1)])
  table(t0[(x0==1 & x1==0)],t1[(x0==1 & x1==0)])
  table(t0[(x0==1 & x1==1)],t1[(x0==1 & x1==1)])
  
  # Some quick clean-up of the data to avoid breaking the functions
  simdat <- data.frame(y, t0=as.factor(t0), t1=as.factor(t1), x0, x1)
  simdat$t0.rec <- as.numeric(as.factor(simdat$t0))
  simdat$t1.rec <- as.numeric(as.factor(simdat$t1))
  
  ########## ESTIMATE REAL PROBABILITIES AND NAIVE MODEL
  real_probs <- rbind(real_probs, c((table(y00)/1000)[2],(table(y01)/1000)[2],(table(y02)/1000)[2],
                                    (table(y10)/1000)[2],(table(y11)/1000)[2],(table(y12)/1000)[2],
                                    (table(y20)/1000)[2],(table(y21)/1000)[2],(table(y22)/1000)[2]))
  ests_bw_saturate <- rbind(ests_bw_saturate, summary(glm(y ~ as.factor(t0) + as.factor(t1) + as.factor(t0):as.factor(t1) + x0 + x1,
                                                          data=simdat))$coefficients[,1])
  ########## ESTIMATE BOOTWEIGHTS OBJECT WITH WEITGTHS AND COEFFICIENTS OUTPUT
  # List ologit
  temp_ologit <- bootWeights(simdat,matrix(c('as.factor(t0) ~ x0','as.factor(t0) ~ 1',
                                             'as.factor(t1) ~ x1+as.factor(t0)+x0', 'as.factor(t1) ~ as.factor(t0)'), ncol=2, byrow = TRUE),
                             'y ~ as.factor(t0) + as.factor(t1) + as.factor(t0):as.factor(t1)', 'ologit', 
                             treat_ind = c(2,3), num_knots=3, num_cats=3, indices=1:1000, out='both')
  # Beta ologit
  ests_bw_ologit <- rbind(ests_bw_ologit, temp_ologit[[2]])
  # Weights ologit
  wts_ologit <- rbind(wts_ologit,temp_ologit[[1]])
  
  #List splines
  temp_splines <- bootWeights(simdat, formulas=matrix(c('t0.rec ~ x1','t0.rec ~ 1',
                                                        't1.rec ~ x1+t0.rec+x0', 't1.rec ~ t0.rec'), ncol=2, byrow = TRUE),
                              outmodel='y ~ as.factor(t0) + as.factor(t1) + as.factor(t0):as.factor(t1)', method='splines', 
                              treat_ind = c(6,7), num_knots=3, num_cats=3, indices=1:1000, out='both')
  # Beta splines
  ests_bw_splines <-rbind(ests_bw_splines,  temp_splines[[2]])
  # Weights splines
  wts_splines <- rbind(wts_splines,temp_splines[[1]])
  
  #List RF
  temp_rf <- bootWeights(simdat, formulas=matrix(c('t0.rec ~ x1','t0.rec ~ 1',
                                                   't1.rec ~ x1+t0.rec+x0', 't1.rec ~ t0.rec'), ncol=2, byrow = TRUE),
                         outmodel='y ~ as.factor(t0) + as.factor(t1) + as.factor(t0):as.factor(t1)', method='rf', 
                         treat_ind = c(2,3), num_knots=3, num_cats=3, indices=1:1000, out='both')
  # Beta RF
  ests_bw_rf <- rbind(ests_bw_rf, temp_rf[[2]])
  # Weights RF
  wts_rf <- rbind(wts_rf,temp_rf[[1]])
  
  ############ SAME THING BUT WITH TRIMMED WEIGHTS
  # List ologit (trim)
  temp_ologit_trim <- bootWeights(simdat,matrix(c('as.factor(t0) ~ x0','as.factor(t0) ~ 1',
                                                  'as.factor(t1) ~ x1+as.factor(t0)+x0', 'as.factor(t1) ~ as.factor(t0)'), ncol=2, byrow = TRUE),
                                  'y ~ as.factor(t0) + as.factor(t1) + as.factor(t0):as.factor(t1)', 'ologit', 
                                  treat_ind = c(2,3), num_knots=3, num_cats=3, indices=1:1000, out='both', trim = TRUE)
  # Beta ologit (trim)
  ests_bw_ologit_trim <- rbind(ests_bw_ologit_trim, temp_ologit_trim[[2]])
  # Weights ologit (trim)
  wts_ologit_trim <- rbind(wts_ologit_trim,temp_ologit_trim[[1]])
  
  # List splines (trim)
  temp_splines_trim <- bootWeights(simdat, formulas=matrix(c('t0.rec ~ x1','t0.rec ~ 1',
                                                             't1.rec ~ x1+t0.rec+x0', 't1.rec ~ t0.rec'), ncol=2, byrow = TRUE),
                                   outmodel='y ~ as.factor(t0) + as.factor(t1) + as.factor(t0):as.factor(t1)', method='splines', 
                                   treat_ind = c(6,7), num_knots=3, num_cats=3, indices=1:1000, out='both', trim= TRUE)
  # Beta splines (trim)
  ests_bw_splines_trim <-rbind(ests_bw_splines_trim, temp_splines_trim[[2]])
  # Weights splines (trim)
  wts_splines_trim <- rbind(wts_splines_trim,temp_splines_trim[[1]])
  
  # List RF (trim)
  temp_rf_trim <- bootWeights(simdat, formulas=matrix(c('t0.rec ~ x1','t0.rec ~ 1',
                                                        't1.rec ~ x1+t0.rec+x0', 't1.rec ~ t0.rec'), ncol=2, byrow = TRUE),
                              outmodel='y ~ as.factor(t0) + as.factor(t1) + as.factor(t0):as.factor(t1)', method='rf', 
                              treat_ind = c(2,3), num_knots=3, num_cats=3, indices=1:1000, out='both', trim=TRUE)
  # Beta RF (trim)
  ests_bw_rf_trim <- rbind(ests_bw_rf_trim, temp_rf_trim[[2]])
  # Weights RF (trim)
  wts_rf_trim <- rbind(wts_rf_trim,temp_rf_trim[[1]])
  print(paste('Iteration:',k, 'completed', sep=" "))
}

# Predicted probabilities
probs_bw_ologit <- t(apply(ests_bw_ologit,1,function(x) getprpr(as.numeric(x))))
bias_bw_ologit <- probs_bw_ologit-real_probs
meanbias_bw_ologit <- apply(bias_bw_ologit,2,mean)

probs_bw_splines <- t(apply(ests_bw_splines,1,function(x) getprpr(as.numeric(x))))
bias_bw_splines <- probs_bw_splines-real_probs
meanbias_bw_splines <- apply(bias_bw_splines,2,mean)

probs_bw_rf <- t(apply(ests_bw_rf,1,function(x) getprpr(as.numeric(x))))
bias_bw_rf <- probs_bw_rf-real_probs
meanbias_bw_rf <- apply(bias_bw_rf,2,mean)

probs_bw_saturate <- t(apply(ests_bw_saturate[,1:9],1,function(x) getprpr(as.numeric(x))))
bias_bw_saturate <- probs_bw_saturate-real_probs
meanbias_bw_saturate <- apply(bias_bw_saturate,2,mean)

probs_bw_ologit_trim <- t(apply(ests_bw_ologit_trim,1,function(x) getprpr(as.numeric(x))))
bias_bw_ologit_trim <- probs_bw_ologit_trim-real_probs
meanbias_bw_ologit_trim <- apply(bias_bw_ologit_trim,2,mean)

probs_bw_splines_trim <- t(apply(ests_bw_splines_trim,1,function(x) getprpr(as.numeric(x))))
bias_bw_splines_trim <- probs_bw_splines_trim-real_probs
meanbias_bw_splines_trim <- apply(bias_bw_splines_trim,2,mean)

probs_bw_rf_trim <- t(apply(ests_bw_rf_trim,1,function(x) getprpr(as.numeric(x))))
bias_bw_rf_trim <- probs_bw_rf_trim-real_probs
meanbias_bw_rf_trim <- apply(bias_bw_rf_trim,2,mean)

meanbias <- rbind(rep(0.6,9),rep(0,9),meanbias_bw_ologit,meanbias_bw_splines,meanbias_bw_rf,meanbias_bw_saturate)
meanbias <- abs(meanbias)

##### CDEs
real_cdes <- t(apply(real_probs,1, getcdes))

cdes_bw_saturate <- t(apply(probs_bw_saturate, 1, getcdes))
cdes_bw_ologit <- t(apply(probs_bw_ologit, 1, getcdes))
cdes_bw_ologit_trim <- t(apply(probs_bw_ologit_trim, 1, getcdes))
cdes_bw_rf <- t(apply(probs_bw_rf, 1, getcdes))
cdes_bw_rf_trim <- t(apply(probs_bw_rf_trim, 1, getcdes))
cdes_bw_splines <- t(apply(probs_bw_splines, 1, getcdes))
cdes_bw_splines_trim <- t(apply(probs_bw_splines_trim, 1, getcdes))

bias_cdes_bw_saturate = cdes_bw_saturate - real_cdes
bias_cdes_bw_ologit = cdes_bw_ologit - real_cdes
bias_cdes_bw_ologit_trim = cdes_bw_ologit_trim - real_cdes
bias_cdes_bw_rf = cdes_bw_rf - real_cdes
bias_cdes_bw_rf_trim = cdes_bw_rf_trim - real_cdes
bias_cdes_bw_splines = cdes_bw_splines - real_cdes
bias_cdes_bw_splines_trim = cdes_bw_splines_trim - real_cdes

c(min(apply(bias_cdes_bw_saturate,2,mean)),max(apply(bias_cdes_bw_saturate,2,mean)))
c(min(apply(bias_cdes_bw_ologit,2,mean)),max(apply(bias_cdes_bw_ologit,2,mean)))
c(min(apply(bias_cdes_bw_rf,2,mean)),max(apply(bias_cdes_bw_rf,2,mean)))
c(min(apply(bias_cdes_bw_splines,2,mean)),max(apply(bias_cdes_bw_splines,2,mean)))

meanbias_cde <- rbind(rep(0.5,9),rep(0,9),apply(bias_cdes_bw_ologit,2,mean),
                      apply(bias_cdes_bw_splines,2,mean),
                      apply(bias_cdes_bw_rf,2,mean),
                      apply(bias_cdes_bw_saturate,2,mean))

meanbias_cde <- abs(meanbias_cde)
colnames(meanbias) <- c("Y[t0=0,t1=0]","Y[t0=0,t1=1]","Y[t0=0,t1=2]",
                        "Y[t0=1,t1=0]","Y[t0=1,t1=1]","Y[t0=1,t1=2]",
                        "Y[t0=2,t1=0]","Y[t0=2,t1=1]","Y[t0=2,t1=2]")

mycols = c('blueviolet', 'deeppink', 'darkolivegreen2', 'darkturquoise')
newcols = sapply(1:4, function(x) t_col(mycols[x], c(40,40,40,40)[x]))
a=10
combs <- matrix(c(0,0,0,1,0,2,1,0,1,1,1,2,2,0,2,1,2,2),ncol=2,byrow = TRUE)
labs = as.expression(apply(combs,1, function(x) bquote(paste('Y'['Z'^'(0)']['='][.(x[1])][', Z'^'(1)']['='][.(x[2])]))))
labs = paste('CDE', 1:9, sep=" ")

pdf('bias_spider2.pdf')
layout(matrix(c(1,2),ncol=1), heights = c(0.9,0.1))
par(mar=c(3,3,2,2),oma=c(2,2,1,1))
radarchart(data.frame(meanbias), axistype=1 , 
           #custom polygon
           pcol=newcols , plwd=2 , plty=1,
           #custom the grid
           cglcol="grey", cglty=1, axislabcol="grey", caxislabels=seq(0,0.6,.15), cglwd=0.8,
           vlabels = labs,
           #custom labels
           vlcex=0.8 
)
par(mar=c(0,0,0,0))
plot(0,0, type="n", axes=FALSE, xaxt="n", yaxt="n")
legend('center',legend = c('Logit', 'GAM', 'Random \n Forest', 'Naive'), bty = "n", pch=20 , col=newcols , text.col = "grey30", cex=0.8, pt.cex=1, ncol=4)
dev.off()

###### SECTION 3: MSMs and controlled direct effects in practice: the effect of parents’ income on political participation

## IMPORT, CLEAN, RUN MULTIPLE IMPUTATION AND EXTRACT IMPUTED DATASETS
# Data
data82 <- read.dta13("Data/Final_Data_Students_7382.dta", convert.factors = TRUE)

# Recode the outcome variables
# Donate money
data82$money82 <- ifelse(as.numeric(data82$V341)==1,1,
                         ifelse(as.numeric(data82$V341)==2,0,NA))
# Rally
data82$rally82 <- ifelse(as.numeric(data82$V320)==1,1,
                         ifelse(as.numeric(data82$V320)==2,0,NA))

# Imputation
m <- 10

# Data names: remove the ID for imputation
names(data82)
data82 <- data82[,-1]

data82_1 <- data82[!is.na(data82$money82) & !is.na(data82$rally82),] # Drop cases with missing data in both variables
table(apply(data82, 2, function(x) sum(is.na(x))))

# Variables for imputation
vars65 <- c("educ_dad", "educ_mom", "race_hoh", "polint_hoh", "qinc_fam",
                               "occup_hoh", "fnews_hoh", "ownhome_hoh", "pid3_hoh", "cosmo_hoh")

vars82_1 <- c(vars65, c("money82", "rally82",
                        "educ_82", "polint_82", "eff_82", "polknow_82",
                        "qinc_82", "occup_82", "ownhome_82", "cosmo_82", "pid3_82",
                        "race_st73", "gender_st65"))

# Seed:
imp82_1 <- data82_1[,vars82_1]
dataimp82_1 <- mice(imp82_1, m, seed=040719)
###

# Remove unnecessary data
rm(data82_1, imp82_1)

# Extract imputed datasets
ldata82_1 <- llply(1:m, function(x) complete(dataimp82_1,x))
names(ldata82_1[[1]])

## MAKE CLUSTER WITH RELEVANT VARIABLES
xcovs82_1 <- list(c("educ_dad", "educ_mom",
                    "race_hoh", "polint_hoh"),
                  c("educ_82", "polint_82",
                    "eff_82", "polknow_82"))
atreat82_1 <- c("qinc_fam", "qinc_82")

num.clusters = 4 # Change according to your computer
cl <- makeCluster(num.clusters, type="FORK")  # Use 18 cores: CHANGE TO YOUR SPECIFIC COMPUTATIONAL POWER. TAKES ABOUT 3 MIN TO RUN IN 18 CORES
clusterExport(cl, c("atreat82_1","xcovs82_1"))


## ESTIMATE THE MODELS IN EACH IMPUTED DATASET (TABLE 3)
ests82_1.rally <- parLapply(cl, ldata82_1, fun = function(x) bootMSM_binout(x, Y = "rally82",
                                                                             A = atreat82_1,
                                                                             X.covs =  xcovs82_1 ,
                                                                             t = 2, 
                                                                             formula = "rally82 ~ as.factor(qinc_fam) + as.factor(qinc_82) + as.factor(race_st73) + gender_st65",
                                                                             type.treat ="ordinal",
                                                                             iter = 500,
                                                                             seed = 231286))


poolests82_2_msm <- poolEstimates(ests82_1.rally, index=1:10, alpha=0.05) 
poolests82_2_over <- poolEstimates(ests82_1.rally, index = 11:28,alpha=0.05)
poolests82_2_under <- poolEstimates(ests82_1.rally, index=29:38, alpha=0.05)

# Table 3, Columns 1 to 3 [ATTEND A RALLY]
round(poolests82_2_msm[1:7,],3)
round(poolests82_2_over[1:7,],3)
round(poolests82_2_under[1:7,],3)

ests82_1.money <- parLapply(cl, ldata82_1, fun = function(x) bootMSM_binout(x, Y = "money82",
                                                                            A = atreat82_1,
                                                                            X.covs =  xcovs82_1 ,
                                                                            t = 2, 
                                                                            formula = "money82 ~ as.factor(qinc_fam) + as.factor(qinc_82) + as.factor(race_st73) + gender_st65",
                                                                            type.treat ="ordinal",
                                                                            iter = 500,
                                                                            seed = 040719))

poolests82_1_msm <- poolEstimates(ests82_1.money, index=1:10, alpha=0.05) 
poolests82_1_over <- poolEstimates(ests82_1.money, index = 11:28,alpha=0.05)
poolests82_1_under <- poolEstimates(ests82_1.money, index=29:38, alpha=0.05)

# Table 3, Columns 4 to 6 [DONATE MONEY]
round(poolests82_1_msm[1:7,],3)
round(poolests82_1_over[1:7,],3)
round(poolests82_1_under[1:7,],3)

library(stargazer)
modbase = glm(rally82 ~ as.factor(qinc_fam) + as.factor(qinc_82), data = ldata82_1[[1]])
modbase2 = glm(money82 ~ as.factor(qinc_fam) + as.factor(qinc_82), data = ldata82_1[[1]])
rownames(poolests82_2_msm)[1:7] <- rownames(poolests82_2_over)[1:7] <- rownames(poolests82_2_under)[1:7] <- rownames(poolests82_1_msm)[1:7] <- rownames(poolests82_1_over)[1:7] <- rownames(poolests82_1_under)[1:7] <- rownames(summary(modbase)$coefficients)

stargazer(modbase, modbase, modbase, modbase2, modbase2, modbase2,
          no.space = TRUE, style = "apsr",
          title= "Controlled direct effects of early income on participation",
          out= "table3.tex",
          column.labels = rep(c("MSM", "Over control", "Under control"), 2),
          covariate.labels = rep(c("Quartile 2", "Quartile 3", "Quartile 4"),2),
          dep.var.labels = c("Attend a rally", "Donate money"),
          coef = list(poolests82_2_msm[1:7,1],poolests82_2_over[1:7,1],poolests82_2_under[1:7,1],
                      poolests82_1_msm[1:7,1],poolests82_1_over[1:7,1],poolests82_1_under[1:7,1]),
          se = list(poolests82_2_msm[1:7,2],poolests82_2_over[1:7,2],poolests82_2_under[1:7,2],
                    poolests82_1_msm[1:7,2],poolests82_1_over[1:7,2],poolests82_1_under[1:7,2]),
          p = list(poolests82_2_msm[1:7,4],poolests82_2_over[1:7,4],poolests82_2_under[1:7,4],
                   poolests82_1_msm[1:7,4],poolests82_1_over[1:7,4],poolests82_1_under[1:7,4]),
          digits = 3, keep.stat = "n", star.cutoffs = c(0.05, NA, NA))

stopCluster(cl)


######################## APPENDIX
## Table SA.1
print("Table SA.1")
demdat[,c(1,2,3,5,6,7,4,8)]

## Figure SA.1
#####################################################
##  SIMULATIONS
#####################################################
registerDoParallel(4) # Change number of cores
bias_msm <- bias_sat <- list()
nsims=400
seq1 = seq(60,2000, 100)
set.seed(2803)

# Change values of n
for (i in 1:length(seq1)){
  print(i)
  list_sims = llply(1:nsims, function(y) simdatnew2(n=seq1[i],beta.x0_t0=3), .parallel = TRUE) 
  true_cdes = do.call(rbind.fill, llply(list_sims, function(x) data.frame(t(x[[1]]))))
  sat_cdes = do.call(rbind.fill, llply(list_sims, function(x) data.frame(t(x[[2]]))))
  msm_cdes = do.call(rbind.fill, llply(list_sims, function(x) data.frame(t(x[[3]]))))
  bias_msm[[i]] = msm_cdes - true_cdes
  bias_sat[[i]] = sat_cdes - true_cdes
}

mean_bias_msm1 <- do.call(rbind, lapply(bias_msm, function(x) apply(x, 2, mean, na.rm=TRUE)))
sd_bias_msm1 <- do.call(rbind,lapply(bias_msm, function(x) apply(x, 2, sd, na.rm=TRUE)))
mean_bias_sat1 <- do.call(rbind,lapply(bias_sat, function(x) apply(x, 2, mean, na.rm=TRUE)))
sd_bias_sat1 <- do.call(rbind, lapply(bias_sat, function(x) apply(x, 2, sd, na.rm=TRUE)))

# Change values of beta0
bias_msm2 <- bias_sat2 <- list()
seq2 = seq3 = seq(0,5,0.1)

for (i in 1:length(seq2)){
  print(i)
  list_sims = llply(1:nsims, function(y) simdatnew(n=1000, beta.x0_t0=seq2[i]), .parallel = TRUE)
  true_cdes = do.call(rbind.fill, llply(list_sims, function(x) data.frame(t(x[[1]]))))
  sat_cdes = do.call(rbind.fill, llply(list_sims, function(x) data.frame(t(x[[2]]))))
  msm_cdes = do.call(rbind.fill, llply(list_sims, function(x) data.frame(t(x[[3]]))))
  bias_msm2[[i]] = msm_cdes - true_cdes
  bias_sat2[[i]] = sat_cdes - true_cdes
}

mean_bias_msm2 <- do.call(rbind, lapply(bias_msm2, function(x) apply(x, 2, mean, na.rm=TRUE)))
sd_bias_msm2 <- do.call(rbind,lapply(bias_msm2, function(x) apply(x, 2, sd, na.rm=TRUE)))
mean_bias_sat2 <- do.call(rbind,lapply(bias_sat2, function(x) apply(x, 2, mean, na.rm=TRUE)))
sd_bias_sat2 <- do.call(rbind, lapply(bias_sat2, function(x) apply(x, 2, sd, na.rm=TRUE)))

# Change values of gamma1
bias_msm3 <- bias_sat3 <- list()

for (i in 1:length(seq3)){
  print(i)
  list_sims = llply(1:nsims, function(y) simdatnew(beta.w1_t1=seq3[i]), .parallel = TRUE) 
  true_cdes = do.call(rbind.fill, llply(list_sims, function(x) data.frame(t(x[[1]]))))
  sat_cdes = do.call(rbind.fill, llply(list_sims, function(x) data.frame(t(x[[2]]))))
  msm_cdes = do.call(rbind.fill, llply(list_sims, function(x) data.frame(t(x[[3]]))))
  bias_msm3[[i]] = msm_cdes - true_cdes
  bias_sat3[[i]] = sat_cdes - true_cdes
}

mean_bias_msm3 <- do.call(rbind, lapply(bias_msm3, function(x) apply(x, 2, mean, na.rm=TRUE)))
sd_bias_msm3 <- do.call(rbind,lapply(bias_msm3, function(x) apply(x, 2, sd, na.rm=TRUE)))
mean_bias_sat3 <- do.call(rbind,lapply(bias_sat3, function(x) apply(x, 2, mean, na.rm=TRUE)))
sd_bias_sat3 <- do.call(rbind, lapply(bias_sat3, function(x) apply(x, 2, sd, na.rm=TRUE)))

pdf('FinalSims_CDE1.pdf')
layout(matrix(c(1,1,2,2,3,4,4,5,6,6,6,6), ncol=4, byrow = TRUE), heights = c(0.45, 0.45,0.1))
par(oma=c(2.5,2.5,3,1), mar=c(2.5,2.5,3,1))
mycol <- rgb(160, 160, 160, max = 255, alpha = 125)
p = 1

seq1 = seq(60,2000,100)
seq2 = seq3 = seq(0,5,0.1)

plot(seq1, rep(0, length(seq1)), type='n', ylim=c(-0.33,0.33), xlim=c(50,tail(seq1,1)+20), lwd=2,
     xaxt='n', yaxt='n', xlab='', ylab= '')
polygon(c(seq1, rev(seq1)), 
        c(mean_bias_msm1[,p]-(1.96*sd_bias_msm1[,p]), rev(mean_bias_msm1[,p]+(1.96*sd_bias_msm1[,p]))),
        col=mycol, border=FALSE)
polygon(c(seq1, rev(seq1)), 
        c(mean_bias_sat1[,p]-(1.96*sd_bias_sat1[,p]), rev(mean_bias_sat1[,p]+(1.96*sd_bias_sat1[,p]))),
        col=mycol, border=FALSE)
lines(seq1, mean_bias_msm1[,p], lwd=2)
lines(seq1, mean_bias_sat1[,p], col='red', lty=2, lwd=2)
axis(1,  tck=0.02, cex.axis=0.7, cex.lab=0.7, mgp=c(0.3, 0.3, 0), lty=1)
axis(2, tck=0.02, cex.axis=0.7, cex.lab=0.7, mgp=c(0.3, 0.3, 0), lty=1, las=2)
title(xlab = expression(n), line = 1.5, cex.lab = 1, ylab='Bias (CDE 1)')
title(line = 2, main="Sample size", font.main=1)
abline(h=0, lty=2)


# Effect of X0
plot(seq2, rep(0, length(seq2)), type='n', ylim=c(-0.2,0.2), lwd=2,
     xaxt='n', yaxt='n', xlab='', ylab= '')
polygon(c(seq2, rev(seq2)), 
        c(mean_bias_msm2[,p]-(1.96*sd_bias_msm2[,p]), rev(mean_bias_msm2[,p]+(1.96*sd_bias_msm2[,p]))),
        col=mycol, border=FALSE)
polygon(c(seq2, rev(seq2)), 
        c(mean_bias_sat2[,p]-(1.96*sd_bias_sat2[,p]), rev(mean_bias_sat2[,p]+(1.96*sd_bias_sat2[,p]))),
        col=mycol, border=FALSE)
lines(seq2, mean_bias_msm2[,p], lwd=2)
lines(seq2, mean_bias_sat2[,p], col='red', lty=2, lwd=2)
axis(1,  tck=0.02, cex.axis=0.7, cex.lab=0.7, mgp=c(0.3, 0.3, 0), lty=1)
axis(2, tck=0.02, cex.axis=0.7, cex.lab=0.7, mgp=c(0.3, 0.3, 0), lty=1, las=2)
axis(3, at=0:5, labels=(0:5)/2, tck=0.02, cex.axis=0.7, cex.lab=0.7, mgp=c(0.3, 0.3, 0), lty=1)
mtext(expression(beta[2]), side = 3, line = 0.75, cex = 0.7)
title(xlab = expression(beta[1]), line = 1.5, cex.lab = 1, ylab='Bias (CDE 1)')
title(line = 2, main="Effect of confounder", font.main=1)
abline(h=0, lty=2)

# PLACE HOLDER
plot(0,0, type="n", axes=FALSE, xaxt='n', yaxt='n', xlab="", ylab="")
#

plot(seq3, rep(0, length(seq3)), type='n', ylim=c(-0.2,.2), lwd=2,
     xaxt='n', yaxt='n', xlab='', ylab= '')
polygon(c(seq3, rev(seq3)), 
        c(mean_bias_msm3[,p]-(1.96*sd_bias_msm3[,p]), rev(mean_bias_msm3[,p]+(1.96*sd_bias_msm3[,p]))),
        col=mycol, border=FALSE)
polygon(c(seq3, rev(seq3)), 
        c(mean_bias_sat3[,p]-(1.96*sd_bias_sat3[,p]), rev(mean_bias_sat3[,p]+(1.96*sd_bias_sat3[,p]))),
        col=mycol, border=FALSE)
lines(seq3, mean_bias_msm3[,p], lwd=2)
lines(seq3, mean_bias_sat3[,p], col='red', lty=2, lwd=2)
axis(1,  tck=0.02, cex.axis=0.7, cex.lab=0.7, mgp=c(0.3, 0.3, 0), lty=1)
axis(2, tck=0.02, cex.axis=0.7, cex.lab=0.7, mgp=c(0.3, 0.3, 0), lty=1, las=2)
axis(3, at=0:5, labels=(0:5)/2, tck=0.02, cex.axis=0.7, cex.lab=0.7, mgp=c(0.3, 0.3, 0), lty=1)
mtext(expression(gamma[2]), side = 3, line = 0.75, cex=0.7)
title(xlab = expression(gamma[1]), line = 1.5, cex.lab = 1, ylab='Bias (CDE 1)')
title(line = 2, main="Omitted confounder", font.main=1)
abline(h=0, lty=2)

# PLACE HOLDER
plot(0,0, type="n", axes=FALSE, xaxt='n', yaxt='n', xlab="", ylab="")
#

par(mar=c(0,0,0,0))
plot(0,0, type="n", axes = FALSE, xlab = "", ylab = "")
legend("center", ncol = 2, 
       legend = c('Saturated', 'MSM'),
       cex=1, bty="n", lty=c(2,1), lwd = c(2,2), col=c("red", "black")) 
dev.off()

## Figure SA.2 
for (i in 1:10){
  wts1 <- prepareIPTW(data = ldata82_1[[i]],
                      Y = "rally82",
                      A = atreat82_1,
                      X.covs =  xcovs82_1 ,
                      t = 2,
                      type.treat = "ordinal")
  ldata82_1[[i]]$wts <- wts1
}


# BALANCE CHECKS

eddM <- eddN <- edmN <- edmM <- rhohM <- rhohN <-pihohM <- pihohN <- edM <- edN <- efM <- efN <- piM <- piN <- pkM <- pkN <- list()

for (i in 1:10){
  mydat <- ldata82_1[[i]]
  
  # Education dad
  modN.edd <- polr(as.factor(qinc_fam) ~ educ_dad, mydat)
  modM.edd <- polr(as.factor(qinc_fam) ~ educ_dad, mydat, weights = wts) 
  
  # Education mom
  modN.edm <- polr(as.factor(qinc_fam) ~ educ_mom, mydat)
  modM.edm <- polr(as.factor(qinc_fam) ~ educ_mom, mydat, weights = wts)
  
  # Race parents
  modN.rhoh <- polr(as.factor(qinc_fam) ~ race_hoh, mydat)
  modM.rhoh <- polr(as.factor(qinc_fam) ~ race_hoh, mydat, weights = wts)
  
  # Political interest parents
  modN.pihoh <- polr(as.factor(qinc_fam) ~ polint_hoh, mydat)
  modM.pihoh <- polr(as.factor(qinc_fam) ~ polint_hoh, mydat, weights = wts)
  
  # Education
  modN.ed <- polr(as.factor(qinc_82) ~ educ_82, mydat)
  modM.ed <- polr(as.factor(qinc_82) ~ educ_82, mydat, weights = wts)
  
  # Political Interest
  modN.pi <- polr(as.factor(qinc_82) ~ polint_82, mydat)
  modM.pi <- polr(as.factor(qinc_82) ~ polint_82, mydat, weights = wts)
  
  # Efficacy HOH
  modN.ef <- polr(as.factor(qinc_82) ~ eff_82, mydat)
  modM.ef <- polr(as.factor(qinc_82) ~ eff_82, mydat, weights = wts)
  
  # Political knowledge
  modN.pk <- polr(as.factor(qinc_82) ~ polknow_82, mydat)
  modM.pk <- polr(as.factor(qinc_82) ~ polknow_82, mydat, weights = wts)
  
  
  pihohM[[i]] <- summary(modM.pihoh)$coef
  pihohN[[i]] <- summary(modN.pihoh)$coef
  eddM[[i]] <- summary(modM.edd)$coef
  eddN[[i]] <- summary(modN.edd)$coef
  edmN[[i]] <- summary(modN.edm)$coef
  edmM[[i]] <- summary(modM.edm)$coef
  rhohM[[i]] <- summary(modM.rhoh)$coef
  rhohN[[i]] <- summary(modN.rhoh)$coef
  pkM[[i]] <- summary(modM.pk)$coef
  pkN[[i]] <- summary(modN.pk)$coef
  efM[[i]] <- summary(modM.ef)$coef
  efN[[i]] <- summary(modN.ef)$coef
  edM[[i]] <- summary(modM.ed)$coef
  edN[[i]] <- summary(modN.ed)$coef
  piM[[i]] <- summary(modM.pi)$coef
  piN[[i]] <- summary(modN.pi)$coef
}

resM <- do.call(rbind,lapply(list(eddM, edmM, rhohM, pihohM, edM, piM, efM, pkM), function(x) poolEstimates(x, c(1,2))[1:2,]))
resM <- resM[seq(1,15,2),]
resN <- do.call(rbind,lapply(list(eddN, edmN, rhohN, pihohN, edN, piN, efN, pkN), function(x) poolEstimates(x, c(1,2))[1:2,]))
resN <- resN[seq(1,15,2),]


# Standardized coefficients
temp.mean <- temp.sd <- list()
for (i in 1:10){
  temp.sd[[i]] <- apply(ldata82_1[[i]][,c("qinc_82", "qinc_fam", unlist(xcovs82_1))],2,
                        function(x) sd(as.numeric(x)))
}
temp.sd <- do.call(rbind, temp.sd)
sds <- as.numeric(apply(temp.sd,2,function(x) mean(x)))


stand.func <- function(beta, sd.x, sd.y){return(beta*(sd.x/sd.y))}
stdbeta_m73 <- stand.func(resM[1:4,1], sds[3:6], sds[2])
stdbeta_m82 <- stand.func(resM[5:8,1], sds[7:10], sds[1])
stdbeta_n73 <- stand.func(resN[1:4,1], sds[3:6], sds[2])
stdbeta_n82 <- stand.func(resN[5:8,1], sds[7:10], sds[1])

pdf("balance_graph.pdf")
par(oma=c(1.5,2.5,0.3,1), mar=c(1.5,2.5,0.3,1))
plot(0,0, type="n",
     xaxt="n", yaxt="n", xlab="", ylab="", main="",
     xlim=c(-0.03,2), ylim=c(-0.7,1.2))

segments(rep(0.5,8), c(stdbeta_n73, stdbeta_n82), rep(1.5,8), 
         c(stdbeta_m73,stdbeta_m82), lwd=2)
points(rep(0.5,8), c(stdbeta_n73, stdbeta_n82), pch=16, cex=1.1)
points(rep(1.5,8), c(stdbeta_m73,stdbeta_m82), pch=21, cex=1.2, col="black", bg="white")

pos1.labels <- c(stdbeta_n73,stdbeta_n82)
pos1.labels[c(7,8)] <- pos1.labels[7:8] + c(-0.02,0.02)
text(c(0.28,0.28, 0.28, 0.18, 0.28, 0.3, 0.28,0.18), pos1.labels ,
     labels = c("Education \n dad", "Education \n mom", "Race HoH", "Political Interest \n HOH", "Education", "Political \n interest","Efficacy", "Political \n Knowledge"), cex = 0.7)
text(1.56, stdbeta_m73[1], labels = "*", cex=1.3, col="red")
text(0.46, pos1.labels+0.02, labels = "*", cex=1.3, col="red")

abline(h=0, lty=2)

# Axis
axis(1, tck=0.005, 
     at = c(0.5,1.5), labels=c("Original", "Pseudo"),
     cex.axis=0.8, cex.lab=0.8, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.005, 
     cex.axis=0.8, cex.lab=0.8, 
     mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
title(ylab = "Standardized coefficient", line = 1.5, cex.lab = 1, xlab="")
legend(1.5,1.2, legend = "p < 0.05", pch = "*", col="red", cex=0.9, bty="n")
dev.off()

sink()

